001 /* 002 * PairwiseAlignment.java 003 * 004 * Copyright 2003 Sergio Anibal de Carvalho Junior 005 * 006 * This file is part of NeoBio. 007 * 008 * NeoBio is free software; you can redistribute it and/or modify it under the terms of 009 * the GNU General Public License as published by the Free Software Foundation; either 010 * version 2 of the License, or (at your option) any later version. 011 * 012 * NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 013 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 014 * PURPOSE. See the GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License along with NeoBio; 017 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, 018 * Boston, MA 02111-1307, USA. 019 * 020 * Proper attribution of the author as the source of the software would be appreciated. 021 * 022 * Sergio Anibal de Carvalho Junior mailto:sergioanibaljr@users.sourceforge.net 023 * Department of Computer Science http://www.dcs.kcl.ac.uk 024 * King's College London, UK http://www.kcl.ac.uk 025 * 026 * Please visit http://neobio.sourceforge.net 027 * 028 * This project was supervised by Professor Maxime Crochemore. 029 * 030 */ 031 032 package neobio.alignment; 033 034 import java.io.Serializable; 035 036 /** 037 * This class is the product of a pairwise alignment, generated by one subclasses of 038 * {@linkplain PairwiseAlignmentAlgorithm}. It contains the two sequences strings with 039 * gaps, a score tag line, and a score value. It is typically displayed in three rows as 040 * in the following example of an alignment between parts of two protein sequences: 041 * 042 * <CODE><BLOCKQUOTE><PRE> 043 * MDEIHQLEDMFTVDSETLRKVVKHFILPHD-----MRTTKHQEELWSFIAELDSLKDFMVEQE // sequence 1 044 * M +I E +FTV +ETL+ V KHFILP D MRTT++ +ELW FIA DSLK F+ EQ // score tag line 045 * MQQIENFEKIFTVPTETLQAVTKHFILP-DATETLMRTTQNPDELWEFIA--DSLKAFIDEQF // sequence 2 046 * </PRE></BLOCKQUOTE></CODE> 047 * 048 * <P>Each column has one character of each sequence and a score tag. The same character 049 * is displayed in all three rows when a column has an exact match (character of sequences 050 * 1 and 2 are equal). When a mismatch occurs (substitution of different characters), the 051 * score tag is left blank. A '+' in the score line signals a partial match (a 052 * substitution of similar characters). The difference between a partial match and a 053 * mismatch is that the score of a partial match is positive whereas the score of a 054 * mismatch is zero or negative (each case is determined by the scoring scheme).</P> 055 * 056 * <P>Gaps are usually represented by dashes ('-') and have a blank score tag. Insertions 057 * have dashes in sequence 1 and the inserted character in sequence 2. Deletions, by 058 * contrast, have the deleted character in sequence 1 and dashes in sequence 2.</P> 059 * 060 * <P>Each column carries a score value for the corresponding operation (as defined by the 061 * scoring scheme). The overall score of a pairwise alignment is the sum of all columns 062 * scores values.</P> 063 * 064 * <P>When the scoring schemes does not support partial matches, a match is usually 065 * signaled by a '|' character.<P> 066 * 067 * <P>Note that these special characters are defined by the 068 * <CODE>PairwiseAlignmentAlgorithm</CODE> class. Consult that class specification for the 069 * actual configuration. For instance, an alignment between two DNA fragmens may look like 070 * this:</P> 071 * 072 * <CODE><BLOCKQUOTE><PRE> 073 * A--C--TAAAAAGCA--TT-AATAATAAA-A 074 * | | |||| ||| || ||||| ||| | 075 * AAGCCCTAAACCGCAAGTTTAATAA-AAATA 076 * </PRE></BLOCKQUOTE></CODE> 077 * 078 * <P>This class is serializable, so it can be saved to a file (or any other output). It 079 * overrides the default <CODE>equals</CODE> method of the <CODE>Object</CODE> class to 080 * allow a proper comparsion of alignments produced by different algorithms or even 081 * different runs of the same algorithm. However, it does not override the 082 * <CODE>hashCode</CODE> method as it is generally the case to maintain the contract for 083 * the <CODE>hashCode</CODE> method (which states that equal objects must have equal hash 084 * codes). Hence, as it is, its use in a hash table is not supported.</P> 085 * 086 * @author Sergio A. de Carvalho Jr. 087 * @see PairwiseAlignmentAlgorithm 088 * @see PairwiseAlignmentAlgorithm#MATCH_TAG 089 * @see PairwiseAlignmentAlgorithm#APPROXIMATE_MATCH_TAG 090 * @see PairwiseAlignmentAlgorithm#MISMATCH_TAG 091 * @see PairwiseAlignmentAlgorithm#GAP_TAG 092 * @see PairwiseAlignmentAlgorithm#GAP_CHARACTER 093 * @see ScoringScheme 094 * @see ScoringScheme#isPartialMatchSupported 095 */ 096 public class PairwiseAlignment implements Serializable 097 { 098 /** 099 * First gapped sequence. 100 * 101 * @serial 102 */ 103 protected String gapped_seq1; 104 105 /** 106 * The score tag line. 107 * 108 * @serial 109 */ 110 protected String score_tag_line; 111 112 /** 113 * Second gapped sequence. 114 * 115 * @serial 116 */ 117 protected String gapped_seq2; 118 119 /** 120 * The overall score value for this alignment. 121 * 122 * @serial 123 */ 124 protected int score; 125 126 /** 127 * Creates a <CODE>PairwiseAlignment</CODE> instance with the specified gapped 128 * sequences, score tag line and score value. 129 * 130 * @param gapped_seq1 the first gapped sequence 131 * @param score_tag_line the score tag line 132 * @param gapped_seq2 the second gapped sequence 133 * @param score the overall score value for this alignment 134 */ 135 public PairwiseAlignment (String gapped_seq1, String score_tag_line, 136 String gapped_seq2, int score) 137 { 138 this.gapped_seq1 = gapped_seq1; 139 this.score_tag_line = score_tag_line; 140 this.gapped_seq2 = gapped_seq2; 141 this.score = score; 142 } 143 144 /** 145 * Returns the first gapped sequence. 146 * 147 * @return first gapped sequence 148 */ 149 public String getGappedSequence1 () 150 { 151 return gapped_seq1; 152 } 153 154 /** 155 * Returns the score tag line. 156 * 157 * @return score tag line 158 */ 159 public String getScoreTagLine () 160 { 161 return score_tag_line; 162 } 163 164 /** 165 * Returns the second gapped sequence. 166 * 167 * @return second gapped sequence 168 */ 169 public String getGappedSequence2 () 170 { 171 return gapped_seq2; 172 } 173 174 /** 175 * Returns the score for this alignment. 176 * 177 * @return overall score for this alignment 178 */ 179 public int getScore () 180 { 181 return score; 182 } 183 184 /** 185 * Returns a four-line String representation of this alignment in the following 186 * order: first gapped sequence, score tag line, second gapped sequence and the 187 * score value. 188 * 189 * @return a String representation of this scoring matrix 190 */ 191 public String toString () 192 { 193 return gapped_seq1 + "\n" + score_tag_line + "\n" 194 + gapped_seq2 + "\nScore: " + score; 195 } 196 197 /** 198 * Compares this object to the specified object. The result is <CODE>true</CODE> if 199 * and only if the argument is not <CODE>null</CODE> and is an 200 * <CODE>PairwiseAlignment</CODE> object that contains the same values as this object, 201 * i.e. the same gapped sequences, the same score tag line and the same score. 202 * 203 * @param obj the object to compare with 204 * @return <CODE>true</CODE> if objects are the same, <CODE>false</CODE> otherwise 205 */ 206 public boolean equals (Object obj) 207 { 208 if (!(obj instanceof PairwiseAlignment)) 209 return false; 210 211 PairwiseAlignment another_pa = (PairwiseAlignment) obj; 212 213 if (this.score != another_pa.score) 214 return false; 215 216 if (!this.gapped_seq1.equals(another_pa.gapped_seq1)) 217 return false; 218 219 if (!this.score_tag_line.equals(another_pa.score_tag_line)) 220 return false; 221 222 if (!this.gapped_seq2.equals(another_pa.gapped_seq2)) 223 return false; 224 225 return true; 226 } 227 }